home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
SGI Freeware 2002 November
/
SGI Freeware 2002 November - Disc 2.iso
/
dist
/
fw_glimpse.idb
/
usr
/
freeware
/
src
/
glimpse-3.0
/
compress
/
string.c.z
/
string.c
Wrap
C/C++ Source or Header
|
1997-09-09
|
8KB
|
280 lines
/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. */
/*
* string.c: String table manipulation routines. Can be used to compute
* the dictionary as well as uncompress files.
*/
#include "defs.h"
extern int MAX_WORDS;
extern int RESERVED_CHARS;
int next_free_strtable = 0;
char *free_strtable = NULL; /*[DEF_MAX_WORDS * AVG_WORD_LEN]; */
extern int usemalloc;
/* debugging only */
int
dump_string(string_table, string_file, index_file)
char **string_table;
unsigned char *string_file, *index_file;
{
FILE *stringfp;
FILE *indexfp;
int i;
if ((stringfp = fopen(string_file, "w")) == NULL) {
fprintf(stderr, "cannot open for writing: %s\n", string_file);
return 0;
}
if ((indexfp = fopen(index_file, "r")) == NULL) {
fprintf(stderr, "cannot open for reading: %s\n", index_file);
fclose(stringfp);
return 0;
}
for(i=0; i<MAX_WORDS; i++) fprintf(stringfp, "%s\n", string_table[i]);
fflush(stringfp);
fclose(stringfp);
fclose(indexfp);
return 1;
}
/*
* VERY particular to the format of the string-table file: which is a series
* of words separated by newlines -- this does an fscanf+strlen in one scan.
*/
int
mystringread(fp, str)
FILE *fp;
char *str;
{
int numread = 0;
int c;
while((numread <= MAX_WORD_LEN) && ((c = getc(fp)) != EOF)) {
if (c == '\n') {
if (numread==0) break; /* first char '\n' => in padded area */
c = '\0';
str[numread++] = c;
return numread;
}
else str[numread++] = c;
}
str[numread] = '\0';
if (c == EOF) return -1;
return numread;
}
int
build_string(string_table, stringfp, bytestoread, initialwordindex)
char *string_table[DEF_MAX_WORDS]; /*[MAX_WORD_LEN+2]; */
FILE *stringfp;
int bytestoread;
int initialwordindex;
{
int wordindex = initialwordindex;
int numread = 0;
int ret;
char dummybuf[MAX_WORD_BUF];
char *word;
if (bytestoread == -1) { /* read until end of file */
while (wordindex < MAX_WORDS) {
if (usemalloc) word = dummybuf;
else {
if (free_strtable == NULL) free_strtable = (char *)malloc(AVG_WORD_LEN * DEF_MAX_WORDS);
if (free_strtable == NULL) break;
word = &free_strtable[next_free_strtable];
}
if ((ret = mystringread(stringfp, word)) == 0) continue;
if (ret == -1) break;
if (usemalloc) {
if ((word = (char *)malloc(ret + 2)) == NULL) break;
strcpy(word, dummybuf);
}
else next_free_strtable += ret + 2;
string_table[wordindex] = word;
#if 0
printf("word=%s index=%d\n", string_table[wordindex], wordindex);
#endif /*0*/
wordindex ++;
}
}
else { /* read only the specified number of bytes */
while((wordindex < MAX_WORDS) && (bytestoread > numread)) {
if (usemalloc) word = dummybuf;
else {
if (free_strtable == NULL) free_strtable = (char *)malloc(AVG_WORD_LEN * DEF_MAX_WORDS);
if (free_strtable == NULL) break;
word = &free_strtable[next_free_strtable];
}
if ((ret = mystringread(stringfp, word)) <= 0) break; /* quit if EOF OR if padded area */
if (usemalloc) {
if ((word = (char *)malloc(ret + 2)) == NULL) break;
strcpy(word, dummybuf);
}
else next_free_strtable += ret + 2;
string_table[wordindex] = word;
#if 0
printf("word=%s index=%d\n", string_table[wordindex], wordindex);
#endif /*0*/
wordindex ++;
numread += ret;
}
}
return wordindex;
}
/*
* Interprets srcbuf as a set of srclen/2 short integers. It looks for all the
* short-integers encoding words in the matched line and loads only those blocks
* of the string table. Note: srcbuf must be aligned on a short-int boundary.
*/
int
build_partial_string(string_table, stringfp, srcbuf, srclen, linebuf, linelen, blocksize, loaded_string_table)
char *string_table[DEF_MAX_WORDS]; /* [MAX_WORD_LEN+2]; */
FILE *stringfp;
unsigned char *srcbuf;
int srclen;
unsigned char *linebuf;
int linelen;
int blocksize;
char loaded_string_table[STRING_FILE_BLOCKS];
{
unsigned char *srcpos;
int blockindex = 0;
unsigned short srcinit, srcend;
unsigned short wordnums[MAX_NAME_LEN]; /* maximum pattern length */
int numwordnums = 0;
int i;
/*
* Find all the relevant wordnums in the line.
*/
i = 0;
while(i<linelen) {
if (linebuf[i] < RESERVED_CHARS) {
if (linebuf[i] == BEGIN_VERBATIM) {
if (ISASCII(linebuf[i+1])) {
while ((linebuf[i] != END_VERBATIM) && (i <linelen)) i ++;
}
else i ++; /* skip over the BEGIN_VERBATIM of non-ascii character */
i ++; /* skip over the non-ascii character OR END_VERBATIM: let it overshoot linelen...its ok */
}
else i ++; /* skip over the character encoding a special word OR a special character */
}
else {
wordnums[numwordnums] = (unsigned char)linebuf[i]; /* always big-endian compression */
wordnums[numwordnums] <<= 8;
wordnums[numwordnums] |= (unsigned char)linebuf[i+1];
wordnums[numwordnums] = decode_index(wordnums[numwordnums]); /* roundabout to avoid buserr */
numwordnums ++;
i += sizeof(short);
}
}
#if 0
for (i=0; i<numwordnums; i++) printf("num%d=%d\n", i, wordnums[i]);
getchar();
#endif /*0*/
srcpos = srcbuf;
srcend = *((unsigned short *)srcpos);
srcpos += sizeof(short);
while (srcpos < srcbuf + srclen) {
srcinit = srcend;
srcend = *((unsigned short *)srcpos);
srcpos += sizeof(short);
#if 0
printf("%d -- %d\n", srcinit, srcend);
#endif /*0*/
for (i=0; i<numwordnums; i++)
if ((wordnums[i] >= srcinit) && (wordnums[i] <= srcend)) goto include_page;
blockindex++;
continue;
include_page: /* Include it if any of the word-indices fit within this range */
if (loaded_string_table[blockindex++]) continue;
#if 0
printf("build_partial_string: hashing words in page# %d\n", blockindex);
#endif /*0*/
loaded_string_table[blockindex - 1] = 1;
fseek(stringfp, (blockindex-1)*blocksize, 0);
build_string(string_table, stringfp, blocksize, srcinit);
}
return 0;
}
pad_string_file(filename, FILEBLOCKSIZE)
unsigned char *filename;
int FILEBLOCKSIZE;
{
FILE *outfp, *infp, *indexfp;
int offset = 0, len;
unsigned char buf[MAX_NAME_LEN];
int pid = getpid();
int i;
unsigned short wordindex = 0;
if ((infp = fopen(filename, "r")) == NULL) {
fprintf(stderr, "cannot open for reading: %s\n", filename);
exit(2);
}
sprintf(buf, "%s.index", filename);
if ((indexfp = fopen(buf, "w")) == NULL) {
fprintf(stderr, "cannot open for writing: %s\n", buf);
fclose(infp);
exit(2);
}
sprintf(buf, "%s.%d", filename, pid);
if ((outfp = fopen(buf, "w")) == NULL) {
fprintf(stderr, "cannot open for writing: %s\n", buf);
fclose(infp);
fclose(indexfp);
exit(2);
}
if ((FILEBLOCKSIZE % MIN_BLOCKSIZE) != 0) {
fprintf(stderr, "invalid block size %d: changing to %d\n", FILEBLOCKSIZE, MIN_BLOCKSIZE);
FILEBLOCKSIZE = MIN_BLOCKSIZE;
}
fprintf(indexfp, "%d\n", FILEBLOCKSIZE);
buf[0] = '\0';
if ((char *)buf != fgets(buf, MAX_NAME_LEN, infp)) goto end_of_input;
len = strlen((char *)buf);
fputs(buf, outfp);
fprintf(indexfp, "%d\n", wordindex);
offset += len;
wordindex ++;
while(fgets(buf, MAX_NAME_LEN, infp) == (char *)buf) {
len = strlen((char *)buf);
if (offset + len > FILEBLOCKSIZE) {
for (i=0; i<FILEBLOCKSIZE-offset; i++) /* fill up with so many newlines until the next block size */
putc('\n', outfp);
fputs(buf, outfp);
fprintf(indexfp, "%d\n", wordindex);
offset = 0;
}
else fputs(buf, outfp);
offset += len;
wordindex ++;
}
fprintf(indexfp, "%d\n", wordindex);
end_of_input:
fclose(infp);
fflush(outfp);
fclose(outfp);
fflush(indexfp);
fclose(indexfp);
sprintf(buf, "mv %s.%d %s\n", filename, pid, filename);
system(buf);
return 0;
}